home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 1
/
Cream of the Crop 1.iso
/
PROGRAM
/
FDF101.ARJ
/
FDF.C
< prev
next >
Wrap
C/C++ Source or Header
|
1992-04-29
|
11KB
|
484 lines
/*
* fdf.c
*
* find duplicates. searches a given path and its sub-directories for
* duplicate files. Duplicate files have the same name, size, date and
* contents. However, the definition used by this program can be almost any
* user-specified combination of the above.
*
* Roy Bixler (original development and Atari ST version, maintenance)
* Ayman Barakat (idea)
* David Oertel (MS-DOS version)
*
* Version 1.0: March 11, 1991 (known as 'mfd - Monk find duplicates')
* Version 1.01: April 12, 1992 (now 'fdf - find duplicate files')
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 1, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <dir.h>
#include <dos.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fdfcomm.h"
#include "fdfstat.h"
#include "elib.h"
#include "fdf.h"
HASH_LIST *H_list[HASH_TAB_SIZE];
/*
* print_help
*
* prints out the help message for the program
*/
void print_help()
{
printf(FDF_USAGE, PROG_NAME, PROG_NAME);
}
/*
* show_doc
*
* show full documentation
*/
void show_doc()
{
printf(FDF_SCHPIEL);
}
/*
* add_to_hash_table
*
* given a file and a hash table index, add the given file to the hash table
* at the given index. Addition is done by putting the new entry at the
* head of a linked list.
*/
void add_to_hash_table(FILE_LIST *f_name, int h_idx)
{
HASH_LIST *new_entry = malloc((size_t) sizeof(HASH_LIST));
if (new_entry != NULL) {
new_entry->f_name = f_name;
new_entry->f_name->printed = 0;
new_entry->next = H_list[h_idx];
H_list[h_idx] = new_entry;
}
}
/*
* gen_hash
*
* given a linked list repesenting a list of files, generate a hash table for
* it.
*/
void gen_hash(FILE_LIST *flist)
{
for (;flist != NULL; flist = flist->next) {
if (!flist->added) {
if (v_flag) {
update_total_bytes(flist->dta.ff_fsize);
update_num_files();
}
if ((Match_criteria) & (NAMES_MATCH))
add_to_hash_table(flist, hashpjw(flist->dta.ff_name));
else if ((Match_criteria) & (SIZES_MATCH))
add_to_hash_table(flist, flist->dta.ff_fsize % HASH_TAB_SIZE);
else if ((Match_criteria) & (TIMES_MATCH))
add_to_hash_table(flist,
((unsigned long) (flist->dta.ff_fdate << 16) +
(unsigned long) (flist->dta.ff_ftime)) %
HASH_TAB_SIZE);
flist->added = 1;
}
}
}
/*
* find_duplicated_name
*
* scans the generated hash table for names which occur twice (or more).
* Takes a pointer to hash table index to start the search at, modifies this
* on return to indicate where it stopped looking (either because of end of
* hash table or because a duplicated name found). Return pointer to first
* occurrence of duplicated name found or NULL if none found.
*/
HASH_LIST *find_duplicated_name(int *h_idx, HASH_LIST *last_found)
{
int i;
HASH_LIST *anchor, *cur;
anchor = last_found;
for (i = *h_idx; i < HASH_TAB_SIZE; i++)
if (H_list[i] != NULL) {
if (anchor == NULL)
anchor = H_list[i];
for (; anchor != NULL; anchor = anchor->next)
if (!anchor->f_name->printed)
for (cur = anchor->next; cur != NULL; cur = cur->next)
if ((!cur->f_name->printed) &&
(cmpflist_eq(anchor->f_name, cur->f_name))) {
*h_idx = i;
return anchor;
}
}
*h_idx = i;
return NULL;
}
/*
* gen_id_menu
*
* given a starting point in the file list, generate an interactive delete
* menu. Return number of items put into the menu.
*/
int gen_id_menu(HASH_LIST *name_duped, FILE_LIST **menu, int max_items)
{
int n_found = 0;
long n_bytes = 0L;
HASH_LIST *cur;
for (cur = name_duped->next;
((cur != NULL) && (n_found < max_items));
cur = cur->next)
if ((!cur->f_name->printed) &&
(files_match(name_duped->f_name, cur->f_name))) {
if (n_found == 0) {
if (v_flag)
n_bytes += name_duped->f_name->dta.ff_fsize;
menu[n_found++] = name_duped->f_name;
}
if (v_flag)
n_bytes += cur->f_name->dta.ff_fsize;
menu[n_found++] = cur->f_name;
}
if ((n_found) && (v_flag)) {
update_num_which_dupd();
update_num_dups(n_found, n_bytes);
}
return n_found;
}
/*
* id_dups
*
* given a pointer to a name which has been determined to be duplicated, print
* all the names and their path's out and ask the user which ones to delete.
*/
void id_dups(HASH_LIST *start)
{
int n_found, i, n_del;
FILE_LIST *cur, *menu[N_INTERACTIVE];
char menu_sel[MAX_STR], which_del[N_INTERACTIVE];
if (!(n_found = gen_id_menu(start, menu, N_INTERACTIVE)))
return;
while (1) {
print_id_menu(menu, n_found);
printf("\nEnter list of files to delete (hit CR for none)\n");
fgets(menu_sel, MAX_STR-1, stdin);
zap_trailing_nl(menu_sel, MAX_STR-1, stdin);
if (!mark_list(menu_sel, which_del, n_found)) {
for (n_del=0, i=0; i < n_found; i++)
if (which_del[i])
if (!delete_path_name_file(menu[i]->path,
menu[i]->dta.ff_name, '\0')) {
n_del++;
if (v_flag) {
update_total_del_bytes(menu[i]->dta.ff_fsize);
if (v_flag > 1) {
printf("Deleted ");
print_fpath(menu[i]->path,
menu[i]->dta.ff_name);
printf("\n");
}
}
}
break;
}
}
if (n_del)
printf("\n");
}
/*
* print_dups
*
* given a pointer to a name which has been determined to be duplicated, print
* all the names and their path's out.
*/
void print_dups(HASH_LIST *name_duped) /* now, who's being duped here? */
{
HASH_LIST *cur;
for (cur = name_duped->next; cur != NULL; cur = cur->next)
if ((!cur->f_name->printed) &&
(files_match(name_duped->f_name, cur->f_name))) {
if (!name_duped->f_name->printed) {
if (v_flag) {
update_num_which_dupd();
update_num_dups(1U, name_duped->f_name->dta.ff_fsize);
}
print_match_header(name_duped->f_name);
print_next_match(name_duped->f_name, -1);
}
if (v_flag)
update_num_dups(1U, cur->f_name->dta.ff_fsize);
print_next_match(cur->f_name, -1);
}
if (name_duped->f_name->printed)
printf("\n");
}
/*
* find_non_printed
*
* given a pointer to a FILE_LIST, return the pointer to the next element which
* has not been printed yet.
*/
HASH_LIST *find_non_printed(HASH_LIST *file)
{
for (; ((file != NULL) && (file->f_name->printed)); file = file->next);
return file;
}
/*
* find_dups
*
* given a path, find the duplicate files and dump them to the standard output.
*/
void find_dups()
{
HASH_LIST *last_found, *f_found;
int i;
gen_hash(F_list);
i = 0;
last_found = NULL;
while ((i < HASH_TAB_SIZE) &&
((f_found = find_duplicated_name(&i, last_found)) != NULL)) {
if (i_flag)
id_dups(f_found);
else
print_dups(f_found);
last_found = find_non_printed(f_found->next);
}
}
/*
* init_hash
*
* insure the hash table is empty
*/
void init_hash()
{
int i;
for (i=0; i<HASH_TAB_SIZE; i++)
H_list[i] = NULL;
}
/*
* set_sort_hash_criteria
*
* must guarantee that no matter what the matching criteria is, that
* duplicate files will always go to the same hash table location.
*
* This is called to make sure that, when comparing two entries in the same
* hash table bucket (i.e when calling 'cmpflist_eq()'), appropriate criteria
* are used to determine if the two entries can possibly match. There is no
* real sorting with the hash table!
*/
void set_sort_hash_criteria()
{
if ((Match_criteria) & (NAMES_MATCH))
Sort_criteria = NAME_SORT;
else if ((Match_criteria) & (SIZES_MATCH))
Sort_criteria = SIZE_SORT;
else if ((Match_criteria) & (TIMES_MATCH))
Sort_criteria = TIME_SORT;
}
/*
* get_options
*
* get the command-line options, check for consistency and set the appropriate
* variables.
*/
int get_options(int argc, char **argv)
{
extern int getopt(int argc, char **argv, char *opts);
extern int Optind;
extern char *optarg;
int optchar;
char a_flag = 0, c_flag = 0, d_flag = 0, n_flag = 0, s_flag = 0;
if (argc < 2) {
print_help();
exit(-1);
}
while ((optchar = getopt(argc, argv, GETOPT_LIST)) != EOF) {
if (isupper(optchar))
optchar = tolower(optchar);
switch (optchar) {
case 'i':
i_flag = 1;
break;
case 'l':
l_flag = 1;
break;
case 'm':
if ((optarg == NULL) || (strpbrk(optarg, "AaCcDdNnSs") != optarg)) {
printf("%s: must specify 'a' or 'c', 'd', 'n' and/or 's' after -m\n",
PROG_NAME);
print_help();
exit(-1);
}
for (;*optarg != '\0'; optarg++) {
if (isupper(*optarg))
*optarg = tolower(*optarg);
switch (*optarg) {
case 'a':
a_flag = 1;
break;
case 'c':
c_flag = 1;
Match_criteria |= CONTENTS_MATCH;
break;
case 'd':
d_flag = 1;
Match_criteria |= TIMES_MATCH;
break;
case 'n':
n_flag = 1;
Match_criteria |= NAMES_MATCH;
break;
case 's':
s_flag = 1;
Match_criteria |= SIZES_MATCH;
break;
default:
printf("%s: invalid match criteria '%c' specified\n",
PROG_NAME, *optarg);
print_help();
exit(-1);
}
}
break;
case 'v':
v_flag++;
break;
case '?':
show_doc();
exit(0);
default:
print_help();
exit(-1);
}
}
if (argc == Optind) {
printf("%s: at least one path specification required\n", PROG_NAME);
print_help();
exit(-1);
}
else if (a_flag)
if ((c_flag) || (d_flag) || (n_flag) || (s_flag)) {
printf("%s: -ma option conflicts with -mc, -md, -mn or -ms\n",
PROG_NAME);
print_help();
exit(-1);
}
else
Match_criteria = ALL_MATCH;
else if (!Match_criteria)
Match_criteria = (TIMES_MATCH|SIZES_MATCH|NAMES_MATCH);
else if (Match_criteria & CONTENTS_MATCH)
Match_criteria |= SIZES_MATCH;
set_sort_hash_criteria();
return Optind;
}
int main(int argc, char **argv)
{
int i;
char form_path[MAXPATH];
init_hash();
for (i = get_options(argc, argv); i < argc; i++) {
format_dir(argv[i], '\0', form_path);
if (v_flag > 1)
printf("finding duplicates under %s:\n\n", form_path);
list_files(PROG_NAME, form_path, (char) 0);
}
find_dups();
if (v_flag)
print_stats();
return(0);
}